notebook.community

Edit and run



In [1]:

    
import pandas as pd









    



/Users/Monica/.virtualenvs/dataanalysis/lib/python3.5/site-packages/matplotlib/__init__.py:1035: UserWarning: Duplicate key in file "/Users/Monica/.matplotlib/matplotlibrc", line #2
  (fname, cnt))



In [2]:

    
violations_df = pd.read_csv("violations.csv")
#violations_df = pd.read_csv("violations.csv", nrows=100)



In [3]:

    
# 1. I want to make sure my Plate ID is a string. Can't lose the leading zeroes!

#3. I want the dates to be dates! Read the read_csv documentation to find out how to make pandas automatically parse dates.
col_types = { 'Plate ID': 'str'}
dof_violations_df = pd.read_csv("DOF_Parking_Violation_Codes.csv", dtype=col_types, parse_dates=True)
#dof_violations_df = pd.read_csv("DOF_Parking_Violation_Codes.csv", dtype=col_types, parse_dates=True, nrows=100)



In [4]:

    
# print(violations_df.columns)



In [5]:

    
# len(violations_df)



In [6]:

    
# print(dof_violations_df.head())

I don't think anyone's car was built in 0AD. Discard the '0's as NaN.



In [7]:

    
violations_df = violations_df[violations_df['Vehicle Year'] != 0]
# print(violations_df['Vehicle Year'].head())

"Date first observed" is a pretty weird column, but it seems like it has a date hiding inside. Using a function with .apply, transform the string (e.g. "20140324") into a Python date. Make the 0's show up as NaN.



In [8]:

    
# print(violations_df['Date First Observed'].head())



In [9]:

    
import datetime
violations_df.head()['Issue Date'].astype(datetime.datetime)









    Out[9]:





0    08/04/2013
1    08/04/2013
3    08/05/2013
4    08/08/2013
6    08/11/2013
Name: Issue Date, dtype: object



In [10]:

    
import datetime
violations_df.head()['Issue Date'].astype(datetime.datetime)

#violations_df['Issue Date'] = violations_df['Issue Date'].astype('datetime64[ns]')









    Out[10]:





0    08/04/2013
1    08/04/2013
3    08/05/2013
4    08/08/2013
6    08/11/2013
Name: Issue Date, dtype: object



In [11]:

    
# violations_df.dtypes

"Violation time" is... not a time. Make it a time.



In [12]:

    
import re
# violations_df['Violation Time'].head()



In [13]:

    
import numpy as np
# Build a function using that method
def time_to_datetime(str_time):
    try:
    #str_time = re.sub('^0', '', str_time)
        if isinstance(str_time, str):
            str_time = str_time + "M"
            #print("Trying to convert", str_time, "into a time")
            return datetime.datetime.strptime(str_time.strip(), "%I%M%p").time()
    #try:
     #   if str_time == -999:
      #      print("It's -999")
       #     return np.nan
    
    except:
        return np.nan



In [14]:

    
# Apply that method to the 'Time' column of the dataframe
violations_df['Violation Time'] = violations_df['Violation Time'].apply(time_to_datetime)



In [15]:

    
print(violations_df['Violation Time'].head())









    



0    07:52:00
1    12:40:00
3    14:32:00
4    12:39:00
6    19:41:00
Name: Violation Time, dtype: object



In [16]:

    
# def remove_1900(year):
#     year = str(year)
#     year = re.sub('^1900-01-01', '', year)
#     return year

# print(violations_df['Violation Time'].apply(remove_1900))

There sure are a lot of colors of cars, too bad so many of them are the same. Make "BLK" and "BLACK", "WT" and "WHITE", and any other combinations that you notice.



In [17]:

    
import re



In [18]:

    
def vehicle_colors(vehicle):
    if isinstance(vehicle, str):
        #print(vehicle)
        vehicle = re.sub('^GY$', 'GREY', vehicle)
        #vehicle = vehicle.replace("GY", "GREY")
        vehicle = re.sub('^WH$', 'WHITE', vehicle)
        vehicle = re.sub('^BR$', 'BROWN', vehicle)
        vehicle = re.sub('^RD$', 'RED', vehicle)
        vehicle = re.sub('^B[LK]$', 'BLACK', vehicle)
        vehicle = re.sub('^TN$', 'TAN', vehicle)
        vehicle = re.sub('^YW$', 'YELLOW', vehicle)
        vehicle = re.sub('^SIL$', 'SILVER', vehicle)
        vehicle = re.sub('^GR$', 'GREEN', vehicle)
        vehicle = re.sub('^SILVE$', 'SILVER', vehicle)
       
       
        
#         vehicle = vehicle.replace("WH","WHITE")
#         vehicle = vehicle.replace("BR","BROWN")
#         vehicle = vehicle.replace("RD","RED")
#         vehicle = vehicle.replace("BL","BLACK")
#         vehicle = vehicle.replace("BK","BLACK")
#         vehicle = vehicle.replace("TN","TAN")
#         vehicle = vehicle.replace("YW","YELLOW")
#         vehicle = re.sub('SIL$', 'SILVER', vehicle)
#         vehicle = vehicle.replace("SILVR","SILVER")
#         vehicle = vehicle.replace("SIL","SILVER")
    return vehicle

violations_df['Vehicle Color'] = violations_df['Vehicle Color'].apply(vehicle_colors)



In [19]:

    
violations_df['Vehicle Color'].head()









    Out[19]:





0     GREY
1    WHITE
3    WHITE
4    BROWN
6       GN
Name: Vehicle Color, dtype: object

Join the data with the Parking Violations Code dataset from the NYC Open Data site.



In [20]:

    
print(violations_df.columns)
violations_df['Violation Code'].head()









    



Index(['Summons Number', 'Plate ID', 'Registration State', 'Plate Type',
       'Issue Date', 'Violation Code', 'Vehicle Body Type', 'Vehicle Make',
       'Issuing Agency', 'Street Code1', 'Street Code2', 'Street Code3',
       'Vehicle Expiration Date', 'Violation Location', 'Violation Precinct',
       'Issuer Precinct', 'Issuer Code', 'Issuer Command', 'Issuer Squad',
       'Violation Time', 'Time First Observed', 'Violation County',
       'Violation In Front Of Or Opposite', 'House Number', 'Street Name',
       'Intersecting Street', 'Date First Observed', 'Law Section',
       'Sub Division', 'Violation Legal Code', 'Days Parking In Effect    ',
       'From Hours In Effect', 'To Hours In Effect', 'Vehicle Color',
       'Unregistered Vehicle?', 'Vehicle Year', 'Meter Number',
       'Feet From Curb', 'Violation Post Code', 'Violation Description',
       'No Standing or Stopping Violation', 'Hydrant Violation',
       'Double Parking Violation'],
      dtype='object')






    Out[20]:





0    46
1    46
3    46
4    41
6    14
Name: Violation Code, dtype: int64



In [21]:

    
violations_df









    Out[21]:






  
    
      
      Summons Number
      Plate ID
      Registration State
      Plate Type
      Issue Date
      Violation Code
      Vehicle Body Type
      Vehicle Make
      Issuing Agency
      Street Code1
      ...
      Vehicle Color
      Unregistered Vehicle?
      Vehicle Year
      Meter Number
      Feet From Curb
      Violation Post Code
      Violation Description
      No Standing or Stopping Violation
      Hydrant Violation
      Double Parking Violation
    
  
  
    
      0
      1283294138
      GBB9093
      NY
      PAS
      08/04/2013
      46
      SUBN
      AUDI
      P
      37250
      ...
      GREY
      0
      2013
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      1
      1283294151
      62416MB
      NY
      COM
      08/04/2013
      46
      VAN
      FORD
      P
      37290
      ...
      WHITE
      0
      2012
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      3
      1283294175
      63009MA
      NY
      COM
      08/05/2013
      46
      VAN
      FORD
      P
      37270
      ...
      WHITE
      0
      2010
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      4
      1283294187
      91648MC
      NY
      COM
      08/08/2013
      41
      TRLR
      GMC
      P
      37240
      ...
      BROWN
      0
      2012
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      6
      1283294229
      GCR2838
      NY
      PAS
      08/11/2013
      14
      VAN
      NaN
      P
      37250
      ...
      GN
      0
      2011
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      8
      1283983631
      GBH9379
      NY
      PAS
      08/07/2013
      24
      SDN
      TOYOT
      X
      63430
      ...
      WHITE
      0
      2001
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      9
      1283983667
      MCL78B
      NJ
      PAS
      07/18/2013
      24
      SDN
      SUBAR
      H
      0
      ...
      BLACK
      0
      2005
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      10
      1283983679
      M367CN
      NY
      PAS
      07/18/2013
      24
      SDN
      HYUND
      H
      0
      ...
      BLUE
      0
      2010
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      11
      1283983734
      GAR6813
      NY
      PAS
      07/18/2013
      24
      SDN
      TOYOT
      H
      0
      ...
      TAN
      0
      1998
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      13
      1283983825
      GAC2703
      NY
      PAS
      08/12/2013
      24
      SDN
      NISSA
      X
      23230
      ...
      WHITE
      0
      2012
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      14
      1286036800
      40793JY
      NY
      COM
      07/05/2013
      14
      VAN
      CHEVR
      P
      34190
      ...
      GREEN
      0
      1995
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      15
      1286123550
      GAD1485
      NY
      PAS
      08/12/2013
      20
      SDN
      VOLKS
      T
      28930
      ...
      WHITE
      0
      2012
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      16
      1286246398
      GFC5338
      NY
      PAS
      07/26/2013
      14
      SDN
      TOYOT
      T
      0
      ...
      GRAY
      0
      2010
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      18
      1286248000
      GJA3452
      NY
      PAS
      07/23/2013
      14
      SDN
      KIA
      T
      73690
      ...
      WHITE
      0
      2013
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      21
      1286289841
      GAV9235
      NY
      PAS
      07/20/2013
      50
      SDN
      HONDA
      T
      0
      ...
      GREY
      0
      2012
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      23
      1286799648
      GDE3973
      NY
      PAS
      07/20/2013
      40
      SDN
      TOYOT
      T
      0
      ...
      TAN
      0
      2003
      -
      9
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      25
      1286807475
      GGL6608
      NY
      PAS
      07/26/2013
      40
      SDN
      NISSA
      T
      73650
      ...
      WHT
      0
      2013
      -
      1
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      26
      1286807633
      84301JU
      NY
      COM
      07/19/2013
      14
      P-U
      DODGE
      X
      63030
      ...
      WHITE
      0
      2001
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      28
      1287320491
      50097JX
      NY
      COM
      08/07/2013
      40
      VAN
      MERCU
      T
      23190
      ...
      RED
      0
      2003
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      29
      1287320570
      GEJ9154
      NY
      PAS
      08/06/2013
      51
      VAN
      TOYOT
      X
      93230
      ...
      GREY
      0
      2013
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      35
      1288135920
      GFM8884
      NY
      OMS
      07/24/2013
      19
      SUBN
      ME/BE
      X
      0
      ...
      BLU
      0
      2013
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      36
      1288259270
      43517LA
      NY
      PAS
      08/02/2013
      18
      SDN
      CADIL
      P
      73720
      ...
      WHT
      0
      2004
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      38
      1289971754
      XDB4574
      VA
      PAS
      08/01/2013
      40
      VAN
      TOYOT
      P
      13610
      ...
      RED
      0
      2007
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      39
      1290136210
      GGW7930
      NY
      PAS
      07/25/2013
      74
      SUBN
      MAZDA
      P
      56600
      ...
      TAN
      0
      2002
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      40
      1290834349
      429J6JM
      NY
      COM
      07/23/2013
      46
      DELV
      INTER
      P
      0
      ...
      BROWN
      0
      1996
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      41
      1290834362
      GBC9079
      NY
      PAS
      07/27/2013
      98
      SDN
      MERCU
      P
      14490
      ...
      GREY
      0
      2012
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      47
      1291896399
      G24BLB
      NJ
      PAS
      07/19/2013
      40
      SDN
      INFIN
      P
      41900
      ...
      BLACK
      0
      2011
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      48
      1291898384
      GER8956
      NY
      SRF
      07/26/2013
      14
      SDN
      CHEVR
      P
      53000
      ...
      GRAY
      0
      2012
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      49
      1291905236
      85796MC
      NY
      COM
      07/26/2013
      17
      DELV
      ISUZU
      P
      33990
      ...
      WHITE
      0
      2013
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      50
      1292755052
      GBB7685
      NY
      PAS
      07/14/2013
      19
      SDN
      LEXUS
      X
      59990
      ...
      WHITE
      0
      2013
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      53
      1292756792
      GDX8921
      99
      PAS
      07/15/2013
      19
      SDN
      HONDA
      X
      28790
      ...
      SILVER
      0
      2003
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      55
      1293030739
      GHG2484
      NY
      PAS
      08/11/2013
      46
      NaN
      FORD
      P
      26440
      ...
      BLACK
      0
      2013
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      56
      1293090530
      GES3519
      NY
      PAS
      07/07/2013
      40
      SDN
      HONDA
      F
      70630
      ...
      BLACK
      0
      1997
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      57
      1293094389
      GDS9810
      NJ
      PAS
      06/20/2013
      31
      SDN
      BMW
      P
      24890
      ...
      SILVER
      0
      2007
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      58
      1293100924
      64195MC
      NY
      COM
      06/20/2013
      46
      VAN
      WHITE
      F
      81330
      ...
      WHITE
      0
      1998
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      61
      1293101758
      GCF5651
      NY
      PAS
      05/24/2013
      17
      SUBN
      CHRYS
      F
      24990
      ...
      GREEN
      0
      2010
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      62
      1293101783
      GCF5651
      NY
      PAS
      06/24/2013
      14
      SUBN
      CHRYS
      F
      24990
      ...
      TAN
      0
      2010
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      63
      1293107761
      4N11B
      NY
      OMT
      06/07/2013
      14
      VAN
      FORD
      F
      34310
      ...
      YELLO
      0
      2012
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      64
      1293525560
      T632455C
      NY
      SRF
      07/29/2013
      14
      SDN
      LINCO
      P
      39430
      ...
      GRY
      0
      2005
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      65
      1293525625
      GGP1847
      NY
      PAS
      07/29/2013
      46
      SUBN
      HONDA
      P
      0
      ...
      WHT
      0
      2013
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      73
      1294712755
      GHF1267
      NY
      PAS
      07/21/2013
      16
      VAN
      CHRYS
      P
      14510
      ...
      BLUE
      0
      2002
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      74
      1294712779
      578032
      NY
      PAS
      07/30/2013
      17
      SDN
      AUDI
      P
      14510
      ...
      BLACK
      0
      2010
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      76
      1294712792
      GCX7943
      NY
      PAS
      07/30/2013
      17
      SDN
      AUDI
      P
      14510
      ...
      BLACK
      0
      1999
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      82
      1294712901
      GDZ4899
      NY
      PAS
      08/04/2013
      17
      SUBN
      CHRYS
      P
      14510
      ...
      RED
      0
      2002
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      84
      1294712950
      GCF7473
      NY
      PAS
      08/04/2013
      17
      SDN
      HYUND
      P
      14510
      ...
      BLACK
      0
      2001
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      87
      1294713218
      GFC8215
      NY
      PAS
      08/13/2013
      38
      SUBN
      CHEVR
      P
      0
      ...
      BLACK
      0
      2012
      -
      0
      NaN
      NaN
      NaN
      NaN
      NaN
    
  

46 rows × 43 columns



In [22]:

    
print(dof_violations_df.columns)
dof_violations_df['CODE'].head()









    



Index(['CODE', 'DEFINITION', 'Manhattan  96th St. & below', 'All Other Areas'], dtype='object')






    Out[22]:





0    10
1    11
2    12
3    13
4    14
Name: CODE, dtype: object



In [23]:

    
# violations_df.merge(dof_violations_df, left_on='Violation Code', right_on='CODE')



In [24]:

    
#print(dof_violations_df['CODE'][0])
#test = dof_violations_df['CODE'][0]

def to_int(test):
    try:
        # test = re.sub('$', '', test)
        test = test.strip("$")
        #print(test)
        test = int(test)
        #print(test, " is now an int")
        return test
    except:
        print(test, " coult not be converted to an int")
        return np.nan



In [25]:

    
dof_violations_df['CODE'] = dof_violations_df['CODE'].apply(to_int)









    



37-38  coult not be converted to an int

Join the data with the Parking Violations Code dataset from the NYC Open Data site.



In [26]:

    
violations_df = violations_df.merge(dof_violations_df, left_on='Violation Code', right_on='CODE')

How much money did NYC make off of parking violations?



In [27]:

    
violations_df.describe()









    



/Users/Monica/.virtualenvs/dataanalysis/lib/python3.5/site-packages/numpy/lib/function_base.py:3823: RuntimeWarning: Invalid value encountered in percentile
  RuntimeWarning)






    Out[27]:






  
    
      
      Summons Number
      Violation Code
      Street Code1
      Street Code2
      Street Code3
      Vehicle Expiration Date
      Violation Location
      Violation Precinct
      Issuer Precinct
      Issuer Code
      ...
      Violation Legal Code
      Unregistered Vehicle?
      Vehicle Year
      Feet From Curb
      Violation Post Code
      Violation Description
      No Standing or Stopping Violation
      Hydrant Violation
      Double Parking Violation
      CODE
    
  
  
    
      count
      4.400000e+01
      44.000000
      44.000000
      44.000000
      44.000000
      4.400000e+01
      43.000000
      44.000000
      44.000000
      44.000000
      ...
      0.0
      44.0
      44.000000
      44.000000
      0.0
      0.0
      0.0
      0.0
      0.0
      44.000000
    
    
      mean
      1.289335e+09
      30.113636
      31847.272727
      28144.340909
      28321.454545
      1.876975e+07
      61.674419
      60.272727
      279.386364
      679245.500000
      ...
      NaN
      0.0
      2007.090909
      0.227273
      NaN
      NaN
      NaN
      NaN
      NaN
      30.113636
    
    
      std
      4.022321e+06
      18.104626
      25830.368405
      20609.272005
      20557.940516
      5.135935e+06
      39.000440
      39.649842
      348.814018
      338298.083452
      ...
      NaN
      0.0
      5.753940
      1.361662
      NaN
      NaN
      NaN
      NaN
      NaN
      18.104626
    
    
      min
      1.283294e+09
      14.000000
      0.000000
      0.000000
      0.000000
      0.000000e+00
      1.000000
      0.000000
      0.000000
      0.000000
      ...
      NaN
      0.0
      1995.000000
      0.000000
      NaN
      NaN
      NaN
      NaN
      NaN
      14.000000
    
    
      25%
      1.286216e+09
      17.000000
      14505.000000
      13210.000000
      15578.750000
      2.014066e+07
      NaN
      20.000000
      0.000000
      343494.500000
      ...
      NaN
      0.0
      2002.000000
      0.000000
      NaN
      NaN
      NaN
      NaN
      NaN
      17.000000
    
    
      50%
      1.290054e+09
      24.000000
      27615.000000
      26965.000000
      25680.000000
      2.014093e+07
      NaN
      73.000000
      106.000000
      919856.000000
      ...
      NaN
      0.0
      2010.000000
      0.000000
      NaN
      NaN
      NaN
      NaN
      NaN
      24.000000
    
    
      75%
      1.293096e+09
      41.500000
      44675.000000
      40404.000000
      39870.750000
      2.014345e+07
      NaN
      85.000000
      458.500000
      930130.000000
      ...
      NaN
      0.0
      2012.000000
      0.000000
      NaN
      NaN
      NaN
      NaN
      NaN
      41.500000
    
    
      max
      1.294713e+09
      98.000000
      93230.000000
      74830.000000
      83330.000000
      2.015102e+07
      120.000000
      120.000000
      976.000000
      947582.000000
      ...
      NaN
      0.0
      2013.000000
      9.000000
      NaN
      NaN
      NaN
      NaN
      NaN
      98.000000
    
  

8 rows × 23 columns



In [28]:

    
violations_df.columns









    Out[28]:





Index(['Summons Number', 'Plate ID', 'Registration State', 'Plate Type',
       'Issue Date', 'Violation Code', 'Vehicle Body Type', 'Vehicle Make',
       'Issuing Agency', 'Street Code1', 'Street Code2', 'Street Code3',
       'Vehicle Expiration Date', 'Violation Location', 'Violation Precinct',
       'Issuer Precinct', 'Issuer Code', 'Issuer Command', 'Issuer Squad',
       'Violation Time', 'Time First Observed', 'Violation County',
       'Violation In Front Of Or Opposite', 'House Number', 'Street Name',
       'Intersecting Street', 'Date First Observed', 'Law Section',
       'Sub Division', 'Violation Legal Code', 'Days Parking In Effect    ',
       'From Hours In Effect', 'To Hours In Effect', 'Vehicle Color',
       'Unregistered Vehicle?', 'Vehicle Year', 'Meter Number',
       'Feet From Curb', 'Violation Post Code', 'Violation Description',
       'No Standing or Stopping Violation', 'Hydrant Violation',
       'Double Parking Violation', 'CODE', 'DEFINITION',
       'Manhattan  96th St. & below', 'All Other Areas'],
      dtype='object')



In [29]:

    
violations_df['Summons Number']









    Out[29]:





0     1283294138
1     1283294151
2     1283294175
3     1290834349
4     1293030739
5     1293100924
6     1293525625
7     1283294229
8     1286036800
9     1286246398
10    1286248000
11    1286807633
12    1291898384
13    1293101783
14    1293107761
15    1293525560
16    1283983631
17    1283983667
18    1283983679
19    1283983734
20    1283983825
21    1286123550
22    1286289841
23    1286799648
24    1286807475
25    1287320491
26    1289971754
27    1291896399
28    1293090530
29    1287320570
30    1288135920
31    1292755052
32    1292756792
33    1288259270
34    1290136210
35    1290834362
36    1291905236
37    1293101758
38    1294712779
39    1294712792
40    1294712901
41    1294712950
42    1293094389
43    1294712755
Name: Summons Number, dtype: int64



In [30]:

    
violations_df['Manhattan\xa0 96th St. & below']









    Out[30]:





0     $115 
1     $115 
2     $115 
3     $115 
4     $115 
5     $115 
6     $115 
7     $115 
8     $115 
9     $115 
10    $115 
11    $115 
12    $115 
13    $115 
14    $115 
15    $115 
16     $65 
17     $65 
18     $65 
19     $65 
20     $65 
21     $65 
22    $115 
23    $115 
24    $115 
25    $115 
26    $115 
27    $115 
28    $115 
29    $115 
30    $115 
31    $115 
32    $115 
33    $115 
34     $65 
35     $95 
36     $95 
37     $95 
38     $95 
39     $95 
40     $95 
41     $95 
42    $115 
43     $95 
Name: Manhattan  96th St. & below, dtype: object



In [31]:

    
violations_df['Manhattan\xa0 96th St. & below'] = violations_df['Manhattan\xa0 96th St. & below'].apply(to_int)



In [32]:

    
violations_df['Manhattan\xa0 96th St. & below'].sum()









    Out[32]:





4550



In [33]:

    
violations_df['All Other Areas'] = violations_df['All Other Areas'].apply(to_int)



In [34]:

    
violations_df['All Other Areas'].sum()









    Out[34]:





4520



In [35]:

    
violations_df['Manhattan\xa0 96th St. & below'].describe()









    Out[35]:





count     44.000000
mean     103.409091
std       18.545128
min       65.000000
25%       95.000000
50%      115.000000
75%      115.000000
max      115.000000
Name: Manhattan  96th St. & below, dtype: float64

What's the most lucrative kind of parking violation? Below lists all the ones that chaarge $115, which is the highest amount



In [36]:

    
violations_df.groupby('CODE')['All Other Areas'].mean().sort_values(ascending=False).head(8)









    Out[36]:





CODE
51.0    115
50.0    115
46.0    115
40.0    115
31.0    115
19.0    115
18.0    115
14.0    115
Name: All Other Areas, dtype: int64

The most frequent parking violation is:



In [37]:

    
import matplotlib.pyplot as plt
%matplotlib inline
freqNYviolations = violations_df.groupby('CODE')['All Other Areas'].count().sort_values(ascending=False).head().plot(kind="bar", color = ['#624ea7', '#599ad3', '#f9a65a', '#9e66ab', 'purple'])
freqNYviolations.set_title('Most Frequent NYC Parking Violations by Code')
freqNYviolations.set_xlabel('Parking Violation Code')
freqNYviolations.set_ylabel('Frequency of Citations')
plt.savefig('freqParkingViolations.png')



In [38]:

    
violations_df.groupby('CODE')['All Other Areas'].count().sort_values(ascending=False).head(1)









    Out[38]:





CODE
14.0    9
Name: All Other Areas, dtype: int64



In [39]:

    
# violations_df.sort_values('All Other Areas', ascending=False)

New Jersey has bad drivers, but does it have bad parkers, too?



In [40]:

    
nj_violations_df = violations_df[violations_df['Registration State'] != 'NJ']

print("There were", len(nj_violations_df['All Other Areas']), "parking violations in NYC by drivers registered in New Jersey")
print("These violations totaled", nj_violations_df['All Other Areas'].sum(), "of revenue")









    



There were 41 parking violations in NYC by drivers registered in New Jersey
These violations totaled 4230 of revenue

How much money does NYC make off of all non-New York vehicles?

Make a chart of the top few.



In [41]:

    
violations_df.groupby('Registration State')['All Other Areas'].sum().sort_values(ascending=False).head(8)









    Out[41]:





Registration State
NY    4000
NJ     290
VA     115
99     115
Name: All Other Areas, dtype: int64



In [42]:

    
import matplotlib.pyplot as plt
%matplotlib inline

nonny_violations_df = violations_df[violations_df['Registration State'] != 'NY']
nonny_violations_df = nonny_violations_df[violations_df['Registration State'] != '99']
nonNYviolations = nonny_violations_df.groupby('Registration State')['All Other Areas'].sum().sort_values(ascending=False).head(20).plot(kind="bar", color = ['#624ea7', '#599ad3', '#f9a65a', '#9e66ab', 'purple'])
nonNYviolations.set_title('Money Earned from NYC Parking Violations by State')
nonNYviolations.set_xlabel('Vehicle Registration State (NON-NYS)')
nonNYviolations.set_ylabel('Amount Earned')
plt.savefig('nonNYParkingViolations.png')









    



/Users/Monica/.virtualenvs/dataanalysis/lib/python3.5/site-packages/ipykernel/__main__.py:6: UserWarning: Boolean Series key will be reindexed to match DataFrame index.



In [43]:

    
import matplotlib.pyplot as plt
%matplotlib inline

nonny_violations_df = violations_df[violations_df['Registration State'] != 'NY']
nonny_violations_df = nonny_violations_df[violations_df['Registration State'] != '99']
freqnonNYviolations = nonny_violations_df.groupby('Registration State')['All Other Areas'].count().sort_values(ascending=False).head(20).plot(kind="bar", color = ['#624ea7', '#599ad3', '#f9a65a', '#9e66ab', 'purple'])
freqnonNYviolations.set_title('Frequency of NYC Parking Violations by State')
freqnonNYviolations.set_xlabel('Vehicle Registration State (NON-NYS)')
freqnonNYviolations.set_ylabel('Number of Citations')
plt.savefig('freqnonNYParkingViolations.png')









    



/Users/Monica/.virtualenvs/dataanalysis/lib/python3.5/site-packages/ipykernel/__main__.py:6: UserWarning: Boolean Series key will be reindexed to match DataFrame index.

What time of day do people usually get their tickets? You can break the day up into several blocks - for example 12am-6am, 6am-12pm, 12pm-6pm, 6pm-12am.

What's the average ticket cost in NYC?



In [48]:

    
print("The average ticket cost in NYC is $", violations_df['All Other Areas'].mean())









    



The average ticket cost in NYC is $ 102.727272727

Make a graph of the number of tickets per day.



In [51]:

    
violations_df.groupby('Issue Date')['All Other Areas'].count().sort_values(ascending=False)









    Out[51]:





Issue Date
08/04/2013    4
07/26/2013    4
07/18/2013    3
08/12/2013    2
07/30/2013    2
07/20/2013    2
07/23/2013    2
08/11/2013    2
07/29/2013    2
06/20/2013    2
08/07/2013    2
07/19/2013    2
07/05/2013    1
07/07/2013    1
06/24/2013    1
07/14/2013    1
06/07/2013    1
07/15/2013    1
07/24/2013    1
07/21/2013    1
07/25/2013    1
07/27/2013    1
08/01/2013    1
08/02/2013    1
08/05/2013    1
08/06/2013    1
05/24/2013    1
Name: All Other Areas, dtype: int64



In [ ]:

    
freqnonNYviolations = violations_df.groupby('Issue Date')['All Other Areas'].count().plot(kind="bar", color = ['#624ea7', '#599ad3', '#f9a65a', '#9e66ab', 'purple'])
freqnonNYviolations.set_title('Frequency of NYC Parking Violations by Date')
freqnonNYviolations.set_xlabel('Date')
freqnonNYviolations.set_ylabel('Number of Citations')
plt.savefig('datedfreqnonNYParkingViolations.png')

Make a graph of the amount of revenue collected per day.



In [55]:

    
freqnonNYviolations = violations_df.groupby('Issue Date')['All Other Areas'].sum().plot(kind="bar", color = ['#624ea7', '#599ad3', '#f9a65a', '#9e66ab', 'purple'])
freqnonNYviolations.set_title('Revenue of NYC Parking Violations by Date')
freqnonNYviolations.set_xlabel('Date')
freqnonNYviolations.set_ylabel('Revenue Generated from Citations')
plt.savefig('datedrevenueonNYParkingViolations.png')



In [ ]:

    
16. Manually construct a dataframe out of https://dmv.ny.gov/statistic/2015licinforce-web.pdf (only NYC boroughts - bronx, queens, manhattan, staten island, brooklyn), having columns for borough name, abbreviation, and number of licensed drivers.
17. What's the parking-ticket-$-per-licensed-driver in each borough of NYC? Do this with pandas and the dataframe you just made, not with your head!

	Summons Number	Plate ID	Registration State	Plate Type	Issue Date	Violation Code	Vehicle Body Type	Vehicle Make	Issuing Agency	Street Code1	...	Vehicle Color	Vehicle Year	Meter Number	Feet From Curb	Violation Post Code	Violation Description	No Standing or Stopping Violation	Hydrant Violation	Double Parking Violation
0	1283294138	GBB9093	NY	PAS	08/04/2013	46	SUBN	AUDI	P	37250	...	GREY	2013	-	0	NaN	NaN	NaN	NaN	NaN
1	1283294151	62416MB	NY	COM	08/04/2013	46	VAN	FORD	P	37290	...	WHITE	2012	-	0	NaN	NaN	NaN	NaN	NaN
3	1283294175	63009MA	NY	COM	08/05/2013	46	VAN	FORD	P	37270	...	WHITE	2010	-	0	NaN	NaN	NaN	NaN	NaN
4	1283294187	91648MC	NY	COM	08/08/2013	41	TRLR	GMC	P	37240	...	BROWN	2012	-	0	NaN	NaN	NaN	NaN	NaN
6	1283294229	GCR2838	NY	PAS	08/11/2013	14	VAN	NaN	P	37250	...	GN	2011	-	0	NaN	NaN	NaN	NaN	NaN
8	1283983631	GBH9379	NY	PAS	08/07/2013	24	SDN	TOYOT	X	63430	...	WHITE	2001	-	0	NaN	NaN	NaN	NaN	NaN
9	1283983667	MCL78B	NJ	PAS	07/18/2013	24	SDN	SUBAR	H	0	...	BLACK	2005	-	0	NaN	NaN	NaN	NaN	NaN
10	1283983679	M367CN	NY	PAS	07/18/2013	24	SDN	HYUND	H	0	...	BLUE	2010	-	0	NaN	NaN	NaN	NaN	NaN
11	1283983734	GAR6813	NY	PAS	07/18/2013	24	SDN	TOYOT	H	0	...	TAN	1998	-	0	NaN	NaN	NaN	NaN	NaN
13	1283983825	GAC2703	NY	PAS	08/12/2013	24	SDN	NISSA	X	23230	...	WHITE	2012	-	0	NaN	NaN	NaN	NaN	NaN
14	1286036800	40793JY	NY	COM	07/05/2013	14	VAN	CHEVR	P	34190	...	GREEN	1995	-	0	NaN	NaN	NaN	NaN	NaN
15	1286123550	GAD1485	NY	PAS	08/12/2013	20	SDN	VOLKS	T	28930	...	WHITE	2012	-	0	NaN	NaN	NaN	NaN	NaN
16	1286246398	GFC5338	NY	PAS	07/26/2013	14	SDN	TOYOT	T	0	...	GRAY	2010	-	0	NaN	NaN	NaN	NaN	NaN
18	1286248000	GJA3452	NY	PAS	07/23/2013	14	SDN	KIA	T	73690	...	WHITE	2013	-	0	NaN	NaN	NaN	NaN	NaN
21	1286289841	GAV9235	NY	PAS	07/20/2013	50	SDN	HONDA	T	0	...	GREY	2012	-	0	NaN	NaN	NaN	NaN	NaN
23	1286799648	GDE3973	NY	PAS	07/20/2013	40	SDN	TOYOT	T	0	...	TAN	2003	-	9	NaN	NaN	NaN	NaN	NaN
25	1286807475	GGL6608	NY	PAS	07/26/2013	40	SDN	NISSA	T	73650	...	WHT	2013	-	1	NaN	NaN	NaN	NaN	NaN
26	1286807633	84301JU	NY	COM	07/19/2013	14	P-U	DODGE	X	63030	...	WHITE	2001	-	0	NaN	NaN	NaN	NaN	NaN
28	1287320491	50097JX	NY	COM	08/07/2013	40	VAN	MERCU	T	23190	...	RED	2003	-	0	NaN	NaN	NaN	NaN	NaN
29	1287320570	GEJ9154	NY	PAS	08/06/2013	51	VAN	TOYOT	X	93230	...	GREY	2013	-	0	NaN	NaN	NaN	NaN	NaN
35	1288135920	GFM8884	NY	OMS	07/24/2013	19	SUBN	ME/BE	X	0	...	BLU	2013	-	0	NaN	NaN	NaN	NaN	NaN
36	1288259270	43517LA	NY	PAS	08/02/2013	18	SDN	CADIL	P	73720	...	WHT	2004	-	0	NaN	NaN	NaN	NaN	NaN
38	1289971754	XDB4574	VA	PAS	08/01/2013	40	VAN	TOYOT	P	13610	...	RED	2007	-	0	NaN	NaN	NaN	NaN	NaN
39	1290136210	GGW7930	NY	PAS	07/25/2013	74	SUBN	MAZDA	P	56600	...	TAN	2002	-	0	NaN	NaN	NaN	NaN	NaN
40	1290834349	429J6JM	NY	COM	07/23/2013	46	DELV	INTER	P	0	...	BROWN	1996	-	0	NaN	NaN	NaN	NaN	NaN
41	1290834362	GBC9079	NY	PAS	07/27/2013	98	SDN	MERCU	P	14490	...	GREY	2012	-	0	NaN	NaN	NaN	NaN	NaN
47	1291896399	G24BLB	NJ	PAS	07/19/2013	40	SDN	INFIN	P	41900	...	BLACK	2011	-	0	NaN	NaN	NaN	NaN	NaN
48	1291898384	GER8956	NY	SRF	07/26/2013	14	SDN	CHEVR	P	53000	...	GRAY	2012	-	0	NaN	NaN	NaN	NaN	NaN
49	1291905236	85796MC	NY	COM	07/26/2013	17	DELV	ISUZU	P	33990	...	WHITE	2013	-	0	NaN	NaN	NaN	NaN	NaN
50	1292755052	GBB7685	NY	PAS	07/14/2013	19	SDN	LEXUS	X	59990	...	WHITE	2013	-	0	NaN	NaN	NaN	NaN	NaN
53	1292756792	GDX8921	99	PAS	07/15/2013	19	SDN	HONDA	X	28790	...	SILVER	2003	-	0	NaN	NaN	NaN	NaN	NaN
55	1293030739	GHG2484	NY	PAS	08/11/2013	46	NaN	FORD	P	26440	...	BLACK	2013	-	0	NaN	NaN	NaN	NaN	NaN
56	1293090530	GES3519	NY	PAS	07/07/2013	40	SDN	HONDA	F	70630	...	BLACK	1997	-	0	NaN	NaN	NaN	NaN	NaN
57	1293094389	GDS9810	NJ	PAS	06/20/2013	31	SDN	BMW	P	24890	...	SILVER	2007	-	0	NaN	NaN	NaN	NaN	NaN
58	1293100924	64195MC	NY	COM	06/20/2013	46	VAN	WHITE	F	81330	...	WHITE	1998	-	0	NaN	NaN	NaN	NaN	NaN
61	1293101758	GCF5651	NY	PAS	05/24/2013	17	SUBN	CHRYS	F	24990	...	GREEN	2010	-	0	NaN	NaN	NaN	NaN	NaN
62	1293101783	GCF5651	NY	PAS	06/24/2013	14	SUBN	CHRYS	F	24990	...	TAN	2010	-	0	NaN	NaN	NaN	NaN	NaN
63	1293107761	4N11B	NY	OMT	06/07/2013	14	VAN	FORD	F	34310	...	YELLO	2012	-	0	NaN	NaN	NaN	NaN	NaN
64	1293525560	T632455C	NY	SRF	07/29/2013	14	SDN	LINCO	P	39430	...	GRY	2005	-	0	NaN	NaN	NaN	NaN	NaN
65	1293525625	GGP1847	NY	PAS	07/29/2013	46	SUBN	HONDA	P	0	...	WHT	2013	-	0	NaN	NaN	NaN	NaN	NaN
73	1294712755	GHF1267	NY	PAS	07/21/2013	16	VAN	CHRYS	P	14510	...	BLUE	2002	-	0	NaN	NaN	NaN	NaN	NaN
74	1294712779	578032	NY	PAS	07/30/2013	17	SDN	AUDI	P	14510	...	BLACK	2010	-	0	NaN	NaN	NaN	NaN	NaN
76	1294712792	GCX7943	NY	PAS	07/30/2013	17	SDN	AUDI	P	14510	...	BLACK	1999	-	0	NaN	NaN	NaN	NaN	NaN
82	1294712901	GDZ4899	NY	PAS	08/04/2013	17	SUBN	CHRYS	P	14510	...	RED	2002	-	0	NaN	NaN	NaN	NaN	NaN
84	1294712950	GCF7473	NY	PAS	08/04/2013	17	SDN	HYUND	P	14510	...	BLACK	2001	-	0	NaN	NaN	NaN	NaN	NaN
87	1294713218	GFC8215	NY	PAS	08/13/2013	38	SUBN	CHEVR	P	0	...	BLACK	2012	-	0	NaN	NaN	NaN	NaN	NaN

	Summons Number	Violation Code	Street Code1	Street Code2	Street Code3	Vehicle Expiration Date	Violation Location	Violation Precinct	Issuer Precinct	Issuer Code	...	Violation Legal Code	Unregistered Vehicle?	Vehicle Year	Feet From Curb	Violation Post Code	Violation Description	No Standing or Stopping Violation	Hydrant Violation	Double Parking Violation	CODE
count	4.400000e+01	44.000000	44.000000	44.000000	44.000000	4.400000e+01	43.000000	44.000000	44.000000	44.000000	...	0.0	44.0	44.000000	44.000000	0.0	0.0	0.0	0.0	0.0	44.000000
mean	1.289335e+09	30.113636	31847.272727	28144.340909	28321.454545	1.876975e+07	61.674419	60.272727	279.386364	679245.500000	...	NaN	0.0	2007.090909	0.227273	NaN	NaN	NaN	NaN	NaN	30.113636
std	4.022321e+06	18.104626	25830.368405	20609.272005	20557.940516	5.135935e+06	39.000440	39.649842	348.814018	338298.083452	...	NaN	0.0	5.753940	1.361662	NaN	NaN	NaN	NaN	NaN	18.104626
min	1.283294e+09	14.000000	0.000000	0.000000	0.000000	0.000000e+00	1.000000	0.000000	0.000000	0.000000	...	NaN	0.0	1995.000000	0.000000	NaN	NaN	NaN	NaN	NaN	14.000000
25%	1.286216e+09	17.000000	14505.000000	13210.000000	15578.750000	2.014066e+07	NaN	20.000000	0.000000	343494.500000	...	NaN	0.0	2002.000000	0.000000	NaN	NaN	NaN	NaN	NaN	17.000000
50%	1.290054e+09	24.000000	27615.000000	26965.000000	25680.000000	2.014093e+07	NaN	73.000000	106.000000	919856.000000	...	NaN	0.0	2010.000000	0.000000	NaN	NaN	NaN	NaN	NaN	24.000000
75%	1.293096e+09	41.500000	44675.000000	40404.000000	39870.750000	2.014345e+07	NaN	85.000000	458.500000	930130.000000	...	NaN	0.0	2012.000000	0.000000	NaN	NaN	NaN	NaN	NaN	41.500000
max	1.294713e+09	98.000000	93230.000000	74830.000000	83330.000000	2.015102e+07	120.000000	120.000000	976.000000	947582.000000	...	NaN	0.0	2013.000000	9.000000	NaN	NaN	NaN	NaN	NaN	98.000000